---
title: "Scheduled Fifth and Scheduled Sixth Areas at the Subdistrict Level"
author: "Pratik Mahajan"
date: "2024-06-21"
output:
  pdf_document:
    fig_caption: yes
    fig_height: 20
    fig_width: 14
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = FALSE)
```

```{r,include=FALSE}
library(sf)
library(dplyr)
library(ggplot2)
library(ggthemes)  
library(patchwork)
library(stringr)
library(knitr)
```


# Kindly Download the subdistrict 2011 polygons shape files from SHRUG


```{r, include=FALSE}
shape_file <-st_read("subdistrict.shp")
scheduled_data <- read.csv("scheduled_areas_by_subdistrict_india.csv")
summary(scheduled_data)
```

```{r, include=FALSE}


shape_file <- shape_file %>% 
  rename(pc11_subdistrict_id = pc11_sd_id)%>% 
  rename(pc11_district_id = pc11_d_id)%>% 
  rename(pc11_state_id = pc11_s_id)

scheduled_data <- scheduled_data %>%
  mutate(
    pc11_state_id = str_pad(pc11_state_id, width = 2, side = "left", pad = "0"),
    pc11_district_id = str_pad(pc11_district_id, width = 3, side = "left", pad = "0"),
    pc11_subdistrict_id = str_pad(pc11_subdistrict_id, width = 5, side = "left", pad = "0")
  )


scheduled_map <- shape_file %>%
  left_join(scheduled_data, by = c("pc11_subdistrict_id", "pc11_district_id", "pc11_state_id"))

scheduled_map <- scheduled_map %>% select(-"sd_name")

table(scheduled_map$pesa_rules_published_state_year)
```

```{r, fig.width=16, fig.height=20, include=FALSE}
scheduled_map <- st_make_valid(scheduled_map)

scheduled_map <- st_cast(scheduled_map, "MULTIPOLYGON")

state_borders <- scheduled_map %>%
  group_by(state_name) %>%
  summarise(geometry = st_union(geometry), .groups = 'drop')
```

# About the Dataset

This document presents a brief overview of the Scheduled Fifth and Scheduled Sixth Areas of India at the subdistrict level (blocks/mandals/tehsils). The dataset is an original compilation of lists released by the Fifth Scheduled states in their Annual Reports on Scheduled Areas, up until 2012. For Sixth Scheduled states, I refer to the post 1986 division of the state of Assam and the subsequent subdistricts that today come under Sixth Scheduled Areas. For both of these areas, subdistricts containing even one village that is scheduled were coded as being scheduled. As most of these documents are scanned copies, these were manually coded to match the state, district and subdistrict identifiers of the SHRUG as per Census 2011. All remaining omissions and mistakes are the author's.

\newpage
# Overview of Scheduled Areas in India at the subdistrict level

```{r, fig.width=16, fig.height=20}
# Plot the map
ggplot() +
 
  geom_sf(data = scheduled_map, aes(geometry = geometry, fill = scheduled_status), color = "darkgrey", size = 0.1) +
  geom_sf(data = state_borders, aes(geometry = geometry), color = "black", fill = NA, size = 0.5) +
  scale_fill_manual(values = c("Fifth Schedule" = "lightgreen", "Sixth Schedule" = "#6e0280", "Not Scheduled" = "grey")) +
  theme_minimal() +
  theme(legend.position = "right", legend.text = element_text(size = 20)) +
  labs(
    title = "Subdistricts in India by Scheduled Status as of 2011",
    subtitle = "Pratik Mahajan (2024)",
    fill = "Scheduled Status"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, size = 30),
    plot.subtitle = element_text(hjust = 0.5, size = 24),
    plot.caption = element_text(size = 12),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  )

ggsave("subdistricts_scheduled_map.png", width = 14, height = 10, dpi = 300)

```

\newpage
## Summary table of Scheduled Subdistricts

```{r, include=FALSE}
scheduled_status_summary <- scheduled_data %>%
  count(scheduled_status) %>%
  arrange(desc(n))%>%
   mutate(percentage = round((n / sum(n)) * 100, 2))
scheduled_status_summary <- scheduled_status_summary %>%
  bind_rows(summarise(scheduled_status_summary, scheduled_status = "Total", n = sum(n), percentage = 100))
```

```{r}
kable(scheduled_status_summary, col.names = c("Scheduled Status", "Number of Subdistricts", "Percentage"), caption = "Summary of Scheduled Status of Subdistricts as of 2011 Census Boundaries")

```

## Summary table of Fifth Scheduled Subdistricts by State

```{r, include=FALSE}
library(tidyr)
states_with_fifth_schedule <- scheduled_data %>%
  filter(scheduled_status == "Fifth Schedule") %>%
  select(state_name) %>%
  distinct()
state_fifth_schedule_summary <- scheduled_data %>%
  filter(state_name %in% states_with_fifth_schedule$state_name) %>%
  group_by(state_name, scheduled_status) %>%
  summarise(count = n(), .groups = 'drop') %>%
  pivot_wider(names_from = scheduled_status, values_from = count, values_fill = 0) %>%
  rename(Scheduled = `Fifth Schedule`, Not_Scheduled = `Not Scheduled`) %>%
  mutate(Total = Scheduled + Not_Scheduled,
         Scheduled_Percentage = round((Scheduled / Total) * 100, 2),
         Not_Scheduled_Percentage = round((Not_Scheduled / Total) * 100, 2))
```

```{r}
kable(state_fifth_schedule_summary, col.names = c("State", "Scheduled Subdistricts", "Not Scheduled Subdistricts", "Total Subdistricts", "Scheduled (%)", "Not Scheduled (%)"), caption = "Summary of Subdistricts by State for Fifth Schedule as of 2011 Census Boundaries")
```


## Summary table of Sixth Scheduled Subdistricts by State

```{r, include=FALSE}
states_with_sixth_schedule <- scheduled_data %>%
  filter(scheduled_status == "Sixth Schedule") %>%
  select(state_name) %>%
  distinct()


state_sixth_schedule_summary <- scheduled_data %>%
  filter(state_name %in% states_with_sixth_schedule$state_name) %>%
  group_by(state_name) %>%
  summarise(
    Scheduled = sum(scheduled_status == "Sixth Schedule"),
    Not_Scheduled = sum(scheduled_status == "Not Scheduled"),
    .groups = 'drop'
  ) %>%
  mutate(
    Total = Scheduled + Not_Scheduled,
    Scheduled_Percentage = round((Scheduled / Total) * 100, 2),
    Not_Scheduled_Percentage = round((Not_Scheduled / Total) * 100, 2)
  )
```

```{r}
kable(state_sixth_schedule_summary, col.names = c("State", "Scheduled Subdistricts", "Not Scheduled Subdistricts", "Total Subdistricts", "Scheduled (%)", "Not Scheduled (%)"), caption = "Summary of Subdistricts by State for Sixth Schedule as of 2011 Census Boundaries")
```

\newpage

# Fifth and Sixth Scheduled Areas by Year of Latest Changes of State's Scheduled Area Classification

```{r, fig.width=16, fig.height=20}
scheduled_map$latest_state_schedule_order <- as.factor(scheduled_map$latest_state_schedule_order)

ggplot() +
  geom_sf(data = subset(scheduled_map, scheduled_status %in% c("Fifth Schedule", "Sixth Schedule")), aes(fill = latest_state_schedule_order), color = "darkgrey", size = 0.2) + 
  geom_sf(data = state_borders, fill = NA, color = "black", size = 1) + 
  scale_fill_manual(
    values = c(
      "1955" = "#00008B", 
      "1972" = "blue",  
      "1975" = "#4169E1", 
      "1977" = "#ADD8E6", 
      "1981" = "lightyellow",  
      "1985" = "yellow", 
      "1986" = "darkorange", 
      "2003" = "red",     
      "2007" = "darkred"   
    ),
    breaks = c("1955", "1972", "1975", "1977", "1981", "1985", "1986", "2003", "2007"),
    labels = c("1955", "1972", "1975", "1977", "1981", "1985", "1986", "2003", "2007")
  ) +
  theme_minimal() +
  theme(legend.position = "right", legend.text = element_text(size=20)) +
  labs(
    title = "Scheduled Subdistricts by State's Latest Scheduled Area Order Year",
    subtitle = "Pratik Mahajan (2024)",
    fill = "Scheduled Area Order Year"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, size= 30),
    plot.subtitle = element_text(hjust = 0.5, size= 24),
    plot.caption = element_text(size = 12),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  )

ggsave("subdistricts_year_change_in_scheduled_area_map.png", width = 14, height = 10, dpi = 300)
```

\newpage
```{r, include=FALSE}
states_with_schedules <- scheduled_data %>%
  filter(scheduled_status %in% c("Sixth Schedule", "Fifth Schedule")) %>%
  select(state_name) %>%
  distinct()

latest_schedule_order_summary <- scheduled_data %>%
  filter(state_name %in% states_with_schedules$state_name) %>%
  group_by(state_name) %>%
  summarise(latest_state_schedule_order = max(latest_state_schedule_order, na.rm = TRUE), .groups = 'drop')%>% arrange(as.numeric(latest_state_schedule_order))

```


```{r}
kable(latest_schedule_order_summary, col.names = c("State", "Latest State Schedule Order"), caption = "Latest State Schedule Order for States with Fifth or Sixth Schedule")
```
\newpage
# Fifth Scheduled Areas by Year First PESA Election Held in State

```{r, include=FALSE}
scheduled_map$pesa_first_election_in_state_year <- as.factor(scheduled_map$pesa_first_election_in_state_year)
```


```{r, fig.width=16, fig.height=20}
ggplot() +
  geom_sf(data = subset(scheduled_map, scheduled_status == "Fifth Schedule"), aes(fill = pesa_first_election_in_state_year), color = "darkgrey", size = 0.2) + 
  geom_sf(data = state_borders, fill = NA, color = "black", size = 1) + 
  scale_fill_manual(
    values = c(
      "2000" = "#00008B", 
      "2001" = "#4169E1",  
      "2002" = "#87CEEB", 
      "2005" = "orange",
      "2007" = "red",  
      "2010" = "darkred"  
    ),
    breaks = c("2000", "2001", "2002", "2005", "2007", "2010"),
    labels = c("2000", "2001", "2002", "2005", "2007", "2010")
  ) +
  theme_minimal() +
  theme(legend.position = "right", legend.text = element_text(size=20)) +
  labs(
    title = "Fifth Scheduled Subdistricts by \n State's First PESA Election Year",
    subtitle = "Pratik Mahajan (2024)",
    fill = "State's First PESA Election Year"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, size= 30),
    plot.subtitle = element_text(hjust = 0.5, size= 24),
    plot.caption = element_text(size = 12),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  )
ggsave("subdistricts_pesa_first_election_map.png", width = 14, height = 10, dpi = 300)
```

\newpage

```{r, include=FALSE}
states_with_fifth_schedule <- scheduled_data %>%
  filter(scheduled_status %in% c("Fifth Schedule")) %>%
  select(state_name) %>%
  distinct()
pesa_first_election_in_state_year_summary <- scheduled_data %>%
  filter(state_name %in% states_with_fifth_schedule$state_name) %>%
  group_by(state_name) %>%
  summarise(pesa_first_election_in_state_year = max(pesa_first_election_in_state_year, na.rm = TRUE), .groups = 'drop')%>% arrange(as.numeric(pesa_first_election_in_state_year))

```


```{r}
kable(pesa_first_election_in_state_year_summary, col.names = c("State", "First Election Under PESA Year"), caption = "Fifth Schedule States by Year of First Panchayat Election Under PESA")
```
\newpage

# Fifth Scheduled Areas by Year State Published PESA rules for Wider Implementation

While the year of the first panchayat election under PESA signals the initiation of the law in the state, a more accurate threshold for commencement of widespread implementation of the PESA is the year that the state released its PESA rules. These rules were deemed necessary as the actual implementation of the PESA has been sluggish absent clear directives. Two Fifth Schedule states are yet to release these rules, namely Odisha and Jharkhand. Both the states continue to experience protests from Scheduled Tribe activists demanding the state to publish these rules, absent which the PESA implementation remains unrealized. 


```{r, include=FALSE}
states_with_fifth_schedule <- scheduled_data %>%
  filter(scheduled_status %in% c("Fifth Schedule")) %>%
  select(state_name) %>%
  distinct()
pesa_rules_published_state_year_summary <- scheduled_data %>%
  filter(state_name %in% states_with_fifth_schedule$state_name) %>%
  group_by(state_name) %>%
  summarise(pesa_rules_published_state_year = max(pesa_rules_published_state_year, na.rm = TRUE), .groups = 'drop')%>% arrange(as.numeric(pesa_rules_published_state_year))

```


```{r}
kable(pesa_rules_published_state_year_summary, col.names = c("State", "PESA Rules Published Year"), caption = "Fifth Schedule States by Year State Published PESA Rules First Panchayat Election Under PESA")
```

```{r, fig.width=16, fig.height=10, include=FALSE}
scheduled_map$pesa_rules_published_state_year <- as.factor(scheduled_map$pesa_rules_published_state_year)
```

\newpage 

```{r, fig.width=16, fig.height=20}
ggplot() +
  geom_sf(data = subset(scheduled_map, scheduled_status == "Fifth Schedule"), aes(fill = pesa_rules_published_state_year), color = "darkgrey", size = 0.2) + 
  geom_sf(data = state_borders, fill = NA, color = "black", size = 1) + 
  scale_fill_manual(
    values = c(
      "2011" = "#00008B", 
      "2014" = "#4169E1", 
      "2017" = "#87CEEB", 
      "2022" = "yellow",  
      "Not_Published_by_2024" = "red" 
    ),
    breaks = c("2011", "2014", "2017", "2022", "Not_Published_by_2024"),
    labels = c("2011", "2014", "2017", "2022", "Not_Published_by_2024")
  ) +
  theme_minimal() +
  theme(legend.position = "right", legend.text = element_text(size=20)) +
  labs(
    title = "Fifth Scheduled Subdistricts by PESA Rules Published \n by State Year, Indicating Wider Implementation",
    subtitle = "Pratik Mahajan (2024)",
    fill = "Rules Published Year"
  ) +
  theme(
    plot.title = element_text(hjust = 0.5, size= 30),
    plot.subtitle = element_text(hjust = 0.5, size= 24),
    plot.caption = element_text(size = 12),
    axis.text = element_blank(),
    axis.ticks = element_blank(),
    axis.title = element_blank(),
    panel.grid = element_blank()
  )

ggsave("subdistricts_state_pesa_rules_publication_year_map.png", width = 14, height = 10, dpi = 300)
```

```{r}
library(knitr)
library(kableExtra)
library(webshot)

save_kable_as_image <- function(kable_input, file_name) {
  temp_file <- tempfile(fileext = ".html")
  save_kable(kable_input, temp_file)
  webshot(temp_file, file_name, vwidth = 1400, vheight = 800)
}

save_kable_as_image(
  kable(scheduled_status_summary, col.names = c("Scheduled Status", "Number of Subdistricts", "Percentage"), caption = "Summary of Scheduled Status of Subdistricts as of 2011 Census Boundaries") %>%
    kable_styling(full_width = FALSE, font_size = 24),
  "scheduled_status_summary.png"
)

save_kable_as_image(
  kable(state_fifth_schedule_summary, col.names = c("State", "Scheduled Subdistricts", "Not Scheduled Subdistricts", "Total Subdistricts", "Scheduled (%)", "Not Scheduled (%)"), caption = "Summary of Subdistricts by State for Fifth Schedule as of 2011 Census Boundaries") %>%
    kable_styling(full_width = FALSE, font_size = 24),
  "fifth_schedule_state_summary.png"
)

save_kable_as_image(
  kable(state_sixth_schedule_summary, col.names = c("State", "Scheduled Subdistricts", "Not Scheduled Subdistricts", "Total Subdistricts", "Scheduled (%)", "Not Scheduled (%)"), caption = "Summary of Subdistricts by State for Sixth Schedule as of 2011 Census Boundaries") %>%
    kable_styling(full_width = FALSE, font_size = 24),
  "sixth_schedule_state_summary.png"
)

save_kable_as_image(
  kable(latest_schedule_order_summary, col.names = c("State", "Latest State Schedule Order"), caption = "Latest State Schedule Order for States with Fifth or Sixth Schedule") %>%
    kable_styling(full_width = FALSE, font_size = 24),
  "schedule_order_summary.png"
)

save_kable_as_image(
  kable(pesa_first_election_in_state_year_summary, col.names = c("State", "First Election Under PESA Year"), caption = "Fifth Schedule States by Year of First Panchayat Election Under PESA") %>%
    kable_styling(full_width = FALSE, font_size = 24),
  "pesa_first_election_in_state_summary.png"
)

save_kable_as_image(
  kable(pesa_rules_published_state_year_summary, col.names = c("State", "PESA Rules Published Year"), caption = "Fifth Schedule States by Year State Published PESA Rules First Panchayat Election Under PESA") %>%
    kable_styling(full_width = FALSE, font_size = 24),
  "pesa_rules_published_state_year_summary.png"
)

```

